This document conducts an EDA on census data only, first at the tract level and then at the places level.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.6
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(spdep)
## Loading required package: sp
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`
library(tidycensus)

data_dir = '~/Google Drive/Coding/EJ datasets/CA pesticide/'

load_sf = function(rds_file) {
    rds_file %>%
        str_c(data_dir, .) %>%
        read_rds() %>%
        ## Remove locations w/ total population or total employed 0
        filter(total_popE != 0, total_employedE != 0) %>%
        ## Population proportions
        mutate(womenP = womenE / total_popE,
               womenPM = moe_prop(womenE, total_popE, womenM, total_popM),
               whiteP = whiteE / total_popE, 
               whitePM = moe_prop(whiteE, total_popE, whiteM, total_popM),
               blackP = blackE / total_popE, 
               blackPM = moe_prop(blackE, total_popE, blackM, total_popM),
               indigenousP = indigenousE / total_popE, 
               indigenousPM = moe_prop(indigenousE, total_popE, indigenousM, total_popM),
               asianP = asianE / total_popE, 
               asianPM = moe_prop(asianE, total_popE, asianM, total_popM),
               hispanicP = hispanicE / total_popE, 
               hispanicPM = moe_prop(hispanicE, total_popE, hispanicM, total_popM),
               noncitizensP = noncitizensE / total_popE,
               noncitizensPM = moe_prop(noncitizensE, total_popE, 
                                        noncitizensM, total_popM),
               childrenP = childrenE / total_popE, 
               childrenPM = moe_prop(childrenE, total_popE, childrenM, total_popM),
               
               poverty_combE = povertyE + extreme_povertyE,
               poverty_combM = sqrt(povertyM^2 + extreme_povertyM^2), 
               poverty_combP = poverty_combE / total_popE, 
               poverty_combPM = moe_prop(poverty_combE, total_popE, 
                                         poverty_combM, total_popM), 
               hisp_povertyP = hisp_povertyE / hispanicE, 
               hisp_povertyPM = moe_prop(hisp_povertyE, hispanicE, hisp_povertyM, hispanicM), 
               ag_employedP = ag_employedE / total_employedE,
               ag_employedPM = moe_prop(ag_employedE, total_employedE, ag_employedM, total_employedM)
        ) %>%
        ## Population densities
        mutate_at(vars(womenE, whiteE, blackE, indigenousE, asianE, hispanicE, 
                       noncitizensE, childrenE, poverty_combE, 
                       hisp_povertyP, ag_employedP), 
                  funs(D = . / units::drop_units(area)))
}

Tracts

tracts_sf = load_sf('02_tracts_sf.Rds')
## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced
## Gives a warning about NaNs; 
## but there aren't any in the output
# as.data.frame() %>%
# select(-geometry) %>%
# transmute_all(funs(is.nan)) %>%
# summarize_all(sum)

glimpse(tracts_sf)
## Observations: 1,044
## Variables: 72
## $ GEOID            <chr> "06007000501", "06007000903", "06007003300", ...
## $ NAME             <chr> "Census Tract 5.01, Butte County, California"...
## $ total_popE       <dbl> 3928, 6473, 4544, 6215, 2912, 4845, 5536, 324...
## $ total_popM       <dbl> 347, 767, 455, 261, 175, 477, 558, 341, 272, ...
## $ womenE           <dbl> 2243, 3200, 2083, 2888, 434, 2287, 2995, 1811...
## $ womenM           <dbl> 314, 451, 257, 205, 118, 306, 332, 202, 155, ...
## $ menE             <dbl> 1685, 3273, 2461, 3327, 2478, 2558, 2541, 143...
## $ menM             <dbl> 224, 524, 313, 240, 194, 312, 371, 211, 176, ...
## $ whiteE           <dbl> 2743, 4200, 3140, 1717, 710, 32, 1832, 332, 5...
## $ whiteM           <dbl> 367, 604, 405, 278, 109, 25, 322, 105, 109, 1...
## $ blackE           <dbl> 24, 265, 159, 26, 436, 859, 157, 261, 160, 35...
## $ blackM           <dbl> 28, 171, 159, 56, 95, 305, 175, 149, 96, 151,...
## $ indigenousE      <dbl> 5, 62, 250, 75, 113, 66, 142, 81, 29, 89, 9, ...
## $ indigenousM      <dbl> 8, 49, 136, 37, 44, 74, 113, 97, 26, 70, 15, ...
## $ asianE           <dbl> 91, 569, 118, 143, 166, 1010, 63, 586, 549, 1...
## $ asianM           <dbl> 77, 208, 100, 111, 54, 330, 87, 252, 295, 328...
## $ hispanicE        <dbl> 834, 1050, 680, 4260, 1537, 2884, 3390, 2020,...
## $ hispanicM        <dbl> 293, 236, 220, 280, 158, 483, 485, 386, 195, ...
## $ noncitizensE     <dbl> 114, 330, 304, 1309, 286, 1113, 258, 384, 367...
## $ noncitizensM     <dbl> 80, 128, 234, 290, 72, 324, 191, 129, 157, 32...
## $ childrenE        <dbl> 173, 367, 216, 357, 0, 531, 528, 358, 271, 56...
## $ childrenM        <dbl> 77, 136, 77, 142, 12, 164, 278, 123, 88, 179,...
## $ extreme_povertyE <dbl> 732, 588, 218, 144, 103, 1619, 950, 234, 325,...
## $ extreme_povertyM <dbl> 253, 241, 103, 111, 53, 482, 490, 135, 211, 5...
## $ povertyE         <dbl> 291, 730, 771, 697, 269, 1352, 1023, 880, 462...
## $ povertyM         <dbl> 158, 282, 304, 313, 65, 577, 391, 261, 245, 4...
## $ hisp_povertyE    <dbl> 332, 323, 246, 738, 147, 1781, 1438, 717, 487...
## $ hisp_povertyM    <dbl> 142, 154, 230, 327, 71, 504, 563, 257, 253, 5...
## $ total_employedE  <dbl> 1963, 3207, 1433, 2775, 138, 1255, 2210, 1308...
## $ total_employedM  <dbl> 253, 425, 242, 268, 62, 255, 397, 188, 175, 2...
## $ ag_employedE     <dbl> 42, 60, 77, 794, 0, 247, 64, 26, 83, 24, 77, ...
## $ ag_employedM     <dbl> 36, 37, 56, 196, 12, 136, 83, 24, 79, 38, 63,...
## $ area             <S3: units> 5.4468555 km^2, 5.9151835 km^2, 309.810...
## $ densityE         <dbl> 721.150027, 1094.302488, 14.667041, 6.526313,...
## $ densityM         <dbl> 63.7064816, 129.6663075, 1.4686407, 0.2740736...
## $ density_log10    <dbl> 2.8580256, 3.0391374, 1.1663425, 0.8146679, 3...
## $ womenP           <dbl> 0.5710285, 0.4943612, 0.4584067, 0.4646822, 0...
## $ womenPM          <dbl> 0.06201256, 0.03772393, 0.03304389, 0.0265928...
## $ whiteP           <dbl> 0.698319756, 0.648849065, 0.691021127, 0.2762...
## $ whitePM          <dbl> 0.070170384, 0.052875339, 0.056179824, 0.0431...
## $ blackP           <dbl> 0.006109980, 0.040939286, 0.034991197, 0.0041...
## $ blackPM          <dbl> 0.007107845, 0.025968218, 0.034815337, 0.0090...
## $ indigenousP      <dbl> 0.001272912, 0.009578248, 0.055017606, 0.0120...
## $ indigenousPM     <dbl> 0.002033553, 0.007484341, 0.029418196, 0.0059...
## $ asianP           <dbl> 0.02316701, 0.08790360, 0.02596831, 0.0230088...
## $ asianPM          <dbl> 0.019495725, 0.030398513, 0.021852884, 0.0178...
## $ hispanicP        <dbl> 0.21232179, 0.16221227, 0.14964789, 0.6854384...
## $ hispanicPM       <dbl> 0.07219597, 0.03098106, 0.04603828, 0.0346572...
## $ noncitizensP     <dbl> 0.02902240, 0.05098100, 0.06690141, 0.2106194...
## $ noncitizensPM    <dbl> 0.02020458, 0.01882915, 0.05105890, 0.0458153...
## $ childrenP        <dbl> 0.044042770, 0.056697049, 0.047535211, 0.0574...
## $ childrenPM       <dbl> 0.019212858, 0.019907315, 0.016263200, 0.0227...
## $ poverty_combE    <dbl> 1023, 1318, 989, 841, 372, 2971, 1973, 1114, ...
## $ poverty_combM    <dbl> 298.28342, 370.95148, 320.97508, 332.09938, 8...
## $ poverty_combP    <dbl> 0.26043788, 0.20361502, 0.21764965, 0.1353177...
## $ poverty_combPM   <dbl> 0.07236859, 0.05198123, 0.06719105, 0.0531321...
## $ hisp_povertyP    <dbl> 0.39808153, 0.30761905, 0.36176471, 0.1732394...
## $ hisp_povertyPM   <dbl> 0.09711196, 0.12934693, 0.31733956, 0.0759113...
## $ ag_employedP     <dbl> 0.021395823, 0.018709074, 0.053733426, 0.2861...
## $ ag_employedPM    <dbl> 0.018130769, 0.011267702, 0.038010707, 0.0650...
## $ womenE_D         <dbl> 411.797228, 540.980683, 6.723470, 3.032662, 5...
## $ whiteE_D         <dbl> 503.593311, 710.037147, 10.135235, 1.803005, ...
## $ blackE_D         <dbl> 4.40621198, 44.79996283, 0.51321732, 0.027302...
## $ indigenousE_D    <dbl> 0.91796083, 10.48150074, 0.80694547, 0.078756...
## $ asianE_D         <dbl> 16.70688709, 96.19312773, 0.38087826, 0.15016...
## $ hispanicE_D      <dbl> 153.115866, 177.509287, 2.194892, 4.473386, 1...
## $ noncitizensE_D   <dbl> 20.92950691, 55.78863295, 0.98124569, 1.37456...
## $ childrenE_D      <dbl> 31.7614447, 62.0437221, 0.6972009, 0.3748823,...
## $ poverty_combE_D  <dbl> 187.8147857, 222.8164189, 3.1922763, 0.883126...
## $ hisp_povertyP_D  <dbl> 0.0730846512, 0.0520049883, 0.0011676976, 0.0...
## $ ag_employedP_D   <dbl> 3.928105e-03, 3.162890e-03, 1.734398e-04, 3.0...
## $ geometry         <MULTIPOLYGON [m]> MULTIPOLYGON (((593948.5 44..., ...
## Distributions of proportions ----
tracts_sf %>%
    as.data.frame() %>%
    select(ends_with('P')) %>%
    gather(key = variable, value) %>%
    ggplot(aes(value, color = variable, fill = variable)) +
    geom_density() +
    geom_rug() +
    facet_wrap(~ variable, scales = 'free') +
    scale_x_continuous(labels = scales::percent_format())

There are a few tracts with a modest proportion of Asian and Black residents (\(> 20\%\)); but only a few. Almost no tracts have more than 5% Indigenous residents, and none have more than about 20%. Children are typically ~5-12% of the population. The poverty rate varies dramatically, with a median somewhere around 30% and some values above 50%. Hispanic and White proportions are the most diverse. Very little variation in proportion of women, though there are a few extreme tracks with values < 25% or > 80%

tracts_sf %>%
    mutate(w_plus_h = whiteP + hispanicP) %>%
    ggplot(aes(w_plus_h)) + 
    geom_density() + 
    geom_rug()

In most tracts, a supermajority of people are either White or Hispanic.

ggplot(tracts_sf, aes(hispanicP, poverty_combP)) + 
    geom_point() +
    geom_smooth(method = 'lm')

A greater Hispanic proportion is associated with a greater poverty rate.

## Correlations ----
tracts_sf %>%
    as_tibble() %>%
    select(densityE, ends_with('P')) %>%
    cor() %>%
    as.data.frame() %>%
    rownames_to_column(var = 'var1') %>%
    as_tibble() %>%
    gather(key = 'var2', value = 'cor', -var1) %>%
    mutate(cor.print = round(cor, digits = 1)) %>%
    ggplot(aes(var1, var2, fill = cor, label = cor.print)) + 
    geom_tile() +
    geom_text() +
    scale_fill_gradient2()

tracts_sf %>%
    as_tibble() %>%
    select(densityE, ends_with('P'), -whiteP) %>%
    cor() %>%
    as.data.frame() %>%
    rownames_to_column(var = 'var1') %>%
    as_tibble() %>%
    gather(key = 'var2', value = 'cor', -var1) %>%
    filter(abs(cor) > .4, var1 < var2) %>%
    arrange(desc(abs(cor)))
## # A tibble: 12 x 3
##    var1          var2            cor
##    <chr>         <chr>         <dbl>
##  1 hispanicP     noncitizensP  0.847
##  2 hisp_povertyP poverty_combP 0.842
##  3 ag_employedP  noncitizensP  0.743
##  4 ag_employedP  hispanicP     0.672
##  5 hispanicP     poverty_combP 0.600
##  6 noncitizensP  poverty_combP 0.598
##  7 childrenP     hispanicP     0.560
##  8 childrenP     poverty_combP 0.481
##  9 childrenP     noncitizensP  0.470
## 10 hisp_povertyP noncitizensP  0.446
## 11 hisp_povertyP hispanicP     0.444
## 12 ag_employedP  poverty_combP 0.420

White proportion has moderate to very strong negative corelations with every other variable (except Indigenous). Very strong correlation between Hispanic and noncitizen proportion and between Hispanic and general poverty. Strong correlations between agricultural employment and noncitizens and Hispanic. Moderate correlations between each pair of poverty, children, noncitizens, and Hispanic, and between agricultural employment and poverty.

ggplot(tracts_sf, aes((densityE), hispanicP)) + 
    geom_point() +
    geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

No indication of a relationship between population density and Hispanic, linear or nonlinear.

## White/Hispanic segregation ----
## Evenness/dissimilarity
tracts_sf %>%
    as.data.frame() %>%
    mutate(w_h_dissim = abs(hispanicE/sum(hispanicE) - whiteE/sum(whiteE))) %>%
    summarize(w_h_dissim = .5 * sum(w_h_dissim))
##   w_h_dissim
## 1  0.4535279

Evenness is moderate, at 47%

ggplot(tracts_sf, aes(abs(hispanicP - whiteP))) + 
    geom_density() +
    geom_rug()

## Exposure/interaction
tracts_sf %>%
    as.data.frame() %>%
    mutate(w_h_exposure = abs(whiteE/sum(whiteE) * hispanicE / total_popE), 
           h_w_exposure = abs(hispanicE/sum(hispanicE) * whiteE/total_popE)) %>%
    summarize(w_h_exposure = sum(w_h_exposure), 
              h_w_exposure = sum(h_w_exposure))
##   w_h_exposure h_w_exposure
## 1    0.3185166    0.2919732

Exposure is moderate-low, at about 30% in both directions

## Correlation
ggplot(tracts_sf, aes(hispanicP, whiteP)) + 
    geom_point()

with(tracts_sf, cor(hispanicP, whiteP))
## [1] -0.8582687

Very strong negative correlation between the two proportions. But I guess, in this kind of two-group context, we would get a very strong negative correlation even if dissimilarity were low and exposure were high.

tracts_sf %>%
    as.data.frame() %>%
    as_tibble() %>%
    summarize_at(vars(whiteE, hispanicE), funs(sum)) %>%
    mutate(hw_ratio = hispanicE / whiteE)
## # A tibble: 1 x 3
##    whiteE hispanicE hw_ratio
##     <dbl>     <dbl>    <dbl>
## 1 2142773   2337573     1.09

About 20% more Hispanics than Whites

## Spatial weights ----
coords_tracts = tracts_sf %>%
    st_centroid() %>%
    st_coordinates()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant
## over geometries of x
## Contiguity
weights_tracts_contig = tracts_sf %>%
    pull(geometry) %>%
    as_Spatial() %>%
    poly2nb() %>%
    nb2listw(style = 'W')
## KNN
weights_tracts_knn = 3:8 %>%
    set_names() %>%
    map(~ {knearneigh(coords_tracts, k = .x) %>%
            knn2nb() %>%
            nb2listw(style = 'W')})
## Inverse spatial weights w/in 50 km
dnn_tracts = dnearneigh(coords_tracts, d1 = 0, d2 = 50 * 1000)
weights_tracts_d = nbdists(dnn_tracts, coords = coords_tracts) %>%
    map( ~ 1/.) %>%
    nb2listw(dnn_tracts, glist = ., style = 'W', 
             zero.policy = TRUE)

weights_tracts = c(weights_tracts_knn, 
                   contiguity = list(weights_tracts_contig),
                   distance = list(weights_tracts_d))

plot(tracts_sf, max.plot = 1)
plot(weights_tracts$contiguity, coords = coords_tracts, 
     add = TRUE, col = 'blue')

## Moran's I ----
moran.i = function(vec, weights, ...) {
    moran.test(vec, 
               weights, ...)$estimate['Moran I statistic']
}

## Moran's I for overall density
moran_i_tracts = weights_tracts %>%
    map_dfr(~moran.i(log10(tracts_sf$densityE), .)) %>%
    gather(key = 'k', value = 'I')
moran_i_tracts
## # A tibble: 8 x 2
##   k              I
##   <chr>      <dbl>
## 1 3          0.458
## 2 4          0.449
## 3 5          0.446
## 4 6          0.433
## 5 7          0.423
## 6 8          0.411
## 7 contiguity 0.662
## 8 distance   0.260

Moderate population clustering, ~.40-.45 for KNN

weights_tracts %>%
    tibble(weights = ., k = names(.)) %>%
    crossing(tibble(variable = c('womenE_D',
                                 'whiteE_D', 
                                 'blackE_D', 
                                 'indigenousE_D', 
                                 'asianE_D', 
                                 'hispanicE_D', 
                                 'noncitizensE_D', 
                                 'childrenE_D', 
                                 'poverty_combE_D'))) %>%
    rowwise() %>%
    mutate(var_value = {tracts_sf %>% 
            as.data.frame() %>%
            pull(variable) %>%
            {. + 1} %>% log10() %>%
            list()}, 
           moran_i = moran.i(var_value, weights)) %>%
    select(k, variable, moran_i) %>%
    arrange(desc(moran_i)) %>%
    mutate(variable = fct_inorder(variable)) %>%
    ggplot(aes(variable, moran_i, color = k, group = k)) + 
    geom_point() +
    geom_line() +
    geom_hline(data = moran_i_tracts, 
               aes(yintercept = I, color = k), 
               linetype = 'dashed') +
    coord_flip()

The 6 KNN neighborings all give similar values of Moran’s \(I\), with slightly lower values as \(K\) increases. The dashed lines correspond to the values of \(I\) for total population density, calculated above. Distance-based weights have consistently lower values of Moran’s \(I\), but order the groups in basically the same way. Continuity weights have consistently higher values of I, with almost no difference between different groups.

Asian and black residents have moderate-high clustering. White, Hispanic, and noncitizen residents have moderate clustering. Children and impoverished residents seem to have clustering values the same as or just above the overall population. Indigenous people have weak positive clustering.

Places

places_sf = load_sf('02_places_sf.Rds')
## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced

## Warning in sqrt(x): NaNs produced
glimpse(places_sf)
## Observations: 391
## Variables: 73
## $ GEOID            <chr> "0605290", "0619402", "0660984", "0675630", "...
## $ name             <chr> "Benicia city, California", "Dixon city, Cali...
## $ county           <chr> "Solano", "Solano", "Solano", "Solano", "Yuba...
## $ total_popE       <dbl> 27657, 18908, 7826, 28867, 12161, 46952, 7032...
## $ total_popM       <dbl> 44, 47, 28, 66, 40, 47, 30, 25, 55, 29, 32, 1...
## $ womenE           <dbl> 14250, 9394, 4068, 14474, 5799, 23107, 3861, ...
## $ womenM           <dbl> 385, 458, 226, 441, 244, 508, 291, 365, 191, ...
## $ menE             <dbl> 13407, 9514, 3758, 14393, 6362, 23845, 3171, ...
## $ menM             <dbl> 389, 459, 228, 441, 244, 511, 293, 366, 189, ...
## $ whiteE           <dbl> 16809, 9523, 5646, 7393, 7316, 13366, 3649, 2...
## $ whiteM           <dbl> 725, 848, 449, 693, 521, 909, 474, 544, 288, ...
## $ blackE           <dbl> 1510, 405, 544, 6260, 642, 804, 103, 194, 19,...
## $ blackM           <dbl> 402, 261, 238, 667, 284, 316, 138, 179, 20, 2...
## $ indigenousE      <dbl> 161, 85, 0, 189, 105, 523, 54, 13, 308, 85, 9...
## $ indigenousM      <dbl> 99, 99, 17, 109, 71, 225, 54, 21, 194, 64, 14...
## $ asianE           <dbl> 3430, 477, 784, 5838, 704, 2699, 100, 165, 85...
## $ asianM           <dbl> 459, 192, 247, 636, 209, 532, 104, 94, 319, 2...
## $ hispanicE        <dbl> 4349, 7538, 716, 7231, 3010, 28503, 2868, 715...
## $ hispanicM        <dbl> 520, 814, 298, 583, 413, 943, 438, 511, 349, ...
## $ noncitizensE     <dbl> 1316, 2140, 242, 2714, 762, 7309, 542, 1926, ...
## $ noncitizensM     <dbl> 285, 527, 141, 478, 309, 637, 236, 426, 303, ...
## $ childrenE        <dbl> 1473, 1219, 256, 1972, 1038, 3919, 550, 991, ...
## $ childrenM        <dbl> 253, 275, 136, 302, 199, 425, 195, 306, 165, ...
## $ extreme_povertyE <dbl> 973, 707, 391, 2105, 1297, 3141, 856, 634, 61...
## $ extreme_povertyM <dbl> 289, 359, 201, 633, 642, 598, 544, 360, 282, ...
## $ povertyE         <dbl> 514, 1901, 522, 2019, 2019, 5122, 844, 1452, ...
## $ povertyM         <dbl> 185, 768, 243, 538, 655, 922, 430, 694, 391, ...
## $ hisp_povertyE    <dbl> 145, 1371, 66, 1794, 909, 5221, 1167, 1762, 1...
## $ hisp_povertyM    <dbl> 90, 641, 82, 636, 382, 892, 583, 727, 410, 67...
## $ total_employedE  <dbl> 13566, 8916, 2531, 13241, 4064, 18324, 2730, ...
## $ total_employedM  <dbl> 479, 525, 276, 650, 401, 758, 324, 386, 221, ...
## $ ag_employedE     <dbl> 101, 378, 86, 92, 104, 632, 194, 197, 414, 41...
## $ ag_employedM     <dbl> 53, 169, 95, 70, 77, 172, 116, 140, 107, 205,...
## $ area             <S3: units> 40.687430 km^2, 18.699253 km^2, 18.2224...
## $ densityE         <dbl> 679.7431, 1011.1634, 429.4712, 2678.8501, 131...
## $ densityM         <dbl> 1.0814151, 2.5134694, 1.5365696, 6.1247829, 4...
## $ density_log10    <dbl> 2.832345, 3.004821, 2.632934, 3.427948, 3.117...
## $ womenP           <dbl> 0.5152403, 0.4968267, 0.5198058, 0.5014030, 0...
## $ womenPM          <dbl> 0.013896372, 0.024191049, 0.028818151, 0.0152...
## $ whiteP           <dbl> 0.60776657, 0.50364925, 0.72144135, 0.2561055...
## $ whitePM          <dbl> 0.026196140, 0.044831264, 0.057314767, 0.0239...
## $ blackP           <dbl> 0.054597389, 0.021419505, 0.069511883, 0.2168...
## $ blackPM          <dbl> 0.014534940, 0.013803578, 0.030410432, 0.0231...
## $ indigenousP      <dbl> 0.005821311, 0.004495452, 0.000000000, 0.0065...
## $ indigenousPM     <dbl> 0.003579552, 0.005235867, 0.002172246, 0.0037...
## $ asianP           <dbl> 0.124019236, 0.025227417, 0.100178891, 0.2022...
## $ asianPM          <dbl> 0.016594987, 0.010154238, 0.031559427, 0.0220...
## $ hispanicP        <dbl> 0.15724771, 0.39866723, 0.09148991, 0.2504936...
## $ hispanicPM       <dbl> 0.018800086, 0.043039154, 0.038076794, 0.0201...
## $ noncitizensP     <dbl> 0.04758289, 0.11317961, 0.03092257, 0.0940173...
## $ noncitizensPM    <dbl> 0.010304527, 0.027870380, 0.018016527, 0.0165...
## $ childrenP        <dbl> 0.05325957, 0.06447007, 0.03271147, 0.0683133...
## $ childrenPM       <dbl> 0.009147382, 0.014543225, 0.017377577, 0.0104...
## $ poverty_combE    <dbl> 1487, 2608, 913, 4124, 3316, 8263, 1700, 2086...
## $ poverty_combM    <dbl> 343.14137, 847.76471, 315.35694, 830.74244, 9...
## $ poverty_combP    <dbl> 0.05376577, 0.13793103, 0.11666241, 0.1428620...
## $ poverty_combPM   <dbl> 0.01240674, 0.04483499, 0.04029390, 0.0287764...
## $ hisp_povertyP    <dbl> 0.03334100, 0.18187848, 0.09217877, 0.2480984...
## $ hisp_povertyPM   <dbl> 0.02030681, 0.08273661, 0.10790802, 0.0856498...
## $ ag_employedP     <dbl> 0.007445083, 0.042395693, 0.033978665, 0.0069...
## $ ag_employedPM    <dbl> 0.003897972, 0.018789579, 0.037351236, 0.0052...
## $ womenE_D         <dbl> 350.23102, 502.37301, 223.24162, 1343.18344, ...
## $ whiteE_D         <dbl> 413.12514, 509.27168, 309.83829, 686.06848, 7...
## $ blackE_D         <dbl> 37.112199, 21.658619, 29.853353, 580.926374, ...
## $ indigenousE_D    <dbl> 3.956996, 4.545636, 0.000000, 17.539151, 11.3...
## $ asianE_D         <dbl> 84.301221, 25.509040, 43.023950, 541.764884, ...
## $ hispanicE_D      <dbl> 106.88805, 403.11771, 39.29228, 671.03492, 32...
## $ noncitizensE_D   <dbl> 32.344142, 114.443075, 13.280352, 251.858495,...
## $ childrenE_D      <dbl> 36.202827, 65.189770, 14.048637, 183.001088, ...
## $ poverty_combE_D  <dbl> 36.54691, 139.47081, 50.10315, 382.70613, 357...
## $ hisp_povertyP_D  <dbl> 0.0008194422, 0.0097265106, 0.0050585393, 0.0...
## $ ag_employedP_D   <dbl> 1.829824e-04, 2.267240e-03, 1.864664e-03, 6.4...
## $ geometry         <MULTIPOLYGON [m]> MULTIPOLYGON (((570293.6 42..., ...
## Distributions of proportions ----
places_sf %>%
    as.data.frame() %>%
    select(ends_with('P')) %>%
    gather(key = variable, value) %>%
    ggplot(aes(value, color = variable, fill = variable)) +
    geom_density() +
    geom_rug() +
    facet_wrap(~ variable, scales = 'free') +
    scale_x_continuous(labels = scales::percent_format())
## Warning: Removed 26 rows containing non-finite values (stat_density).

Slightly higher proportions across the board. But no dramatic differences.

places_sf %>%
    mutate(w_plus_h = whiteP + hispanicP) %>%
    ggplot(aes(w_plus_h)) + 
    geom_density() + 
    geom_rug()

Again, white+Hispanic supermajority

ggplot(places_sf, aes(hispanicP, poverty_combP)) + 
    geom_point() +
    geom_smooth(method = 'lm')

Again, greater Hispanic proportion is associated with a greater poverty rate.

## Correlations ----
places_sf %>%
    as_tibble() %>%
    select(densityE, womenP, whiteP, blackP, childrenP, hispanicP, indigenousP, noncitizensP, poverty_combP, whiteP) %>%
    cor() %>%
    as.data.frame() %>%
    rownames_to_column(var = 'var1') %>%
    as_tibble() %>%
    gather(key = 'var2', value = 'cor', -var1) %>%
    mutate(cor.print = round(cor, digits = 1)) %>%
    ggplot(aes(var1, var2, fill = cor, label = cor.print)) + 
    geom_tile() +
    geom_text() +
    scale_fill_gradient2()

places_sf %>%
    as_tibble() %>%
    select(densityE, ends_with('P'), -whiteP) %>%
    cor() %>%
    as.data.frame() %>%
    rownames_to_column(var = 'var1') %>%
    as_tibble() %>%
    gather(key = 'var2', value = 'cor', -var1) %>%
    filter(abs(cor) > .4, var1 < var2) %>%
    arrange(desc(abs(cor)))
## # A tibble: 7 x 3
##   var1         var2            cor
##   <chr>        <chr>         <dbl>
## 1 hispanicP    noncitizensP  0.847
## 2 ag_employedP noncitizensP  0.701
## 3 ag_employedP hispanicP     0.617
## 4 hispanicP    poverty_combP 0.595
## 5 noncitizensP poverty_combP 0.566
## 6 ag_employedP poverty_combP 0.499
## 7 densityE     hispanicP     0.472

White is still anticorrelated with everything except Indigenous. Strong or moderate correlations between noncitizens, poverty, or Hispanic. (Not children.) Moderate correlations between Hispanic and density, and moderate-weak between Hispanic and children and density and noncitizens.

ggplot(places_sf, aes((densityE), hispanicP)) + 
    geom_point() +
    geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Monotonic nonlinear relationship between density and Hispanic.

## White/Hispanic segregation ----
## Evenness/dissimilarity
places_sf %>%
    as.data.frame() %>%
    mutate(w_h_dissim = abs(hispanicE/sum(hispanicE) - whiteE/sum(whiteE))) %>%
    summarize(w_h_dissim = .5 * sum(w_h_dissim))
##   w_h_dissim
## 1   0.357544

36% dissimilarity, lower than with tracts

ggplot(places_sf, aes(abs(hispanicP - whiteP))) + 
    geom_density() +
    geom_rug()

## Exposure/interaction
places_sf %>%
    as.data.frame() %>%
    mutate(w_h_exposure = abs(whiteE/sum(whiteE) * hispanicE / total_popE), 
           h_w_exposure = abs(hispanicE/sum(hispanicE) * whiteE/total_popE)) %>%
    summarize(w_h_exposure = sum(w_h_exposure), 
              h_w_exposure = sum(h_w_exposure))
##   w_h_exposure h_w_exposure
## 1    0.3518653    0.3049761

Slightly higher White-Hispanic exposure, but still moderate-low

## Correlation
ggplot(places_sf, aes(hispanicP, whiteP)) + 
    geom_point()

with(tracts_sf, cor(hispanicP, whiteP))
## [1] -0.8582687

Very strong negative correlation between the two proportions. But I guess, in this kind of two-group context, we would get a very strong negative correlation even if dissimilarity were low and exposure were high.

places_sf %>%
    as.data.frame() %>%
    as_tibble() %>%
    summarize_at(vars(whiteE, hispanicE), funs(sum)) %>%
    mutate(hw_ratio = hispanicE / whiteE)
## # A tibble: 1 x 3
##    whiteE hispanicE hw_ratio
##     <dbl>     <dbl>    <dbl>
## 1 1787371   2062174     1.15

Hispanic-white ratio slightly higher, at 27%

## Spatial weights ----
library(spdep)

coords_places = places_sf %>%
    st_centroid() %>%
    st_coordinates()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant
## over geometries of x
## Contiguity
weights_places_contig = places_sf %>%
    pull(geometry) %>%
    as_Spatial() %>%
    poly2nb() %>%
    nb2listw(style = 'W', zero.policy = TRUE)
## KNN
weights_places_knn = 3:8 %>%
    set_names() %>%
    map(~ {knearneigh(coords_places, k = .x) %>%
            knn2nb() %>%
            nb2listw(style = 'W')})
## Inverse spatial weights w/in 50 km
dnn_places = dnearneigh(coords_places, d1 = 0, d2 = 50 * 1000)
weights_places_d = nbdists(dnn_places, coords = coords_places) %>%
    map( ~ 1/.) %>%
    nb2listw(dnn_places, glist = ., style = 'W', zero.policy = TRUE)

weights_places = c(weights_places_knn, 
                   contiguity = list(weights_places_contig),
                   distance = list(weights_places_d))

plot(places_sf, max.plot = 1)
plot(weights_places$contiguity, coords = coords_places, 
     add = TRUE, col = 'blue')

## All systems of neighbors produce an archipelago of tight clusters and longer connections.  Neither seems to produce ridiculously extended "neighbor" connections.  
## 
## Contiguity weights produce large numbers of islands:  246/397 (62%) have no neighbors

weights_places$contiguity$neighbours
## Neighbour list object:
## Number of regions: 391 
## Number of nonzero links: 240 
## Percentage nonzero weights: 0.1569848 
## Average number of links: 0.6138107 
## 242 regions with no links:
## ID2 ID3 ID5 ID7 ID8 ID9 ID12 ID13 ID14 ID15 ID21 ID22 ID23 ID25 ID27 ID33 ID34 ID35 ID36 ID37 ID41 ID42 ID44 ID45 ID49 ID50 ID51 ID52 ID53 ID54 ID55 ID56 ID57 ID58 ID59 ID60 ID61 ID62 ID63 ID64 ID65 ID66 ID67 ID68 ID74 ID75 ID77 ID78 ID80 ID81 ID82 ID85 ID86 ID87 ID88 ID90 ID91 ID92 ID93 ID97 ID98 ID99 ID101 ID102 ID104 ID106 ID107 ID110 ID111 ID112 ID113 ID114 ID115 ID117 ID118 ID119 ID121 ID123 ID124 ID129 ID130 ID131 ID132 ID134 ID135 ID136 ID138 ID139 ID140 ID141 ID142 ID144 ID146 ID147 ID148 ID150 ID151 ID152 ID153 ID155 ID156 ID157 ID158 ID159 ID160 ID161 ID164 ID167 ID168 ID170 ID171 ID172 ID173 ID175 ID176 ID178 ID181 ID182 ID183 ID184 ID186 ID191 ID192 ID193 ID198 ID199 ID203 ID204 ID205 ID206 ID208 ID209 ID210 ID212 ID215 ID219 ID222 ID223 ID224 ID225 ID226 ID230 ID232 ID233 ID235 ID236 ID237 ID240 ID241 ID242 ID243 ID244 ID246 ID247 ID248 ID250 ID251 ID254 ID256 ID257 ID259 ID261 ID262 ID263 ID265 ID267 ID268 ID269 ID270 ID272 ID275 ID276 ID279 ID280 ID281 ID284 ID291 ID295 ID297 ID298 ID301 ID302 ID303 ID304 ID305 ID306 ID308 ID310 ID311 ID312 ID313 ID314 ID315 ID316 ID318 ID320 ID321 ID322 ID323 ID328 ID329 ID331 ID332 ID333 ID334 ID335 ID339 ID340 ID341 ID343 ID344 ID345 ID346 ID347 ID348 ID349 ID351 ID355 ID356 ID357 ID359 ID360 ID361 ID362 ID363 ID364 ID365 ID366 ID368 ID371 ID372 ID373 ID374 ID375 ID376 ID379 ID380 ID383 ID387 ID389 ID390 ID391
## Moran's I ----
moran.i = function(vec, weights, ...) {
    moran.test(vec, 
               weights, ...)$estimate['Moran I statistic']
}

## Moran's I for overall density
moran_i_places = weights_places %>%
    map_dfr(~moran.i(log10(places_sf$densityE), ., 
                     zero.policy = TRUE)) %>%
    gather(key = 'k', value = 'I')
moran_i_places
## # A tibble: 8 x 2
##   k              I
##   <chr>      <dbl>
## 1 3          0.522
## 2 4          0.498
## 3 5          0.477
## 4 6          0.461
## 5 7          0.445
## 6 8          0.426
## 7 contiguity 0.282
## 8 distance   0.389

Higher moderate population clustering, .45-.53. Distance weights are more consistent w/ KNN here. Contiguity weights are much lower.

weights_places %>%
    tibble(weights = ., k = names(.)) %>%
    crossing(tibble(variable = c('womenE_D',
                                 'whiteE_D', 
                                 'blackE_D', 
                                 'indigenousE_D', 
                                 'asianE_D', 
                                 'hispanicE_D', 
                                 'noncitizensE_D', 
                                 'childrenE_D', 
                                 'poverty_combE_D'))) %>%
    rowwise() %>%
    mutate(var_value = {places_sf %>% 
            as.data.frame() %>%
            pull(variable) %>%
            {. + 1} %>% log10() %>%
            list()}, 
           moran_i = moran.i(var_value, weights, 
                             zero.policy = TRUE)) %>%
    select(k, variable, moran_i) %>%
    arrange(desc(moran_i)) %>%
    mutate(variable = fct_inorder(variable)) %>%
    ggplot(aes(variable, moran_i, color = k, group = k)) + 
    geom_point() +
    geom_line() +
    geom_hline(data = moran_i_places, 
               aes(yintercept = I, color = k), 
               linetype = 'dashed') +
    coord_flip()

With places, most groups have low and below-average clustering. Impoverished people, noncitizens, and Hispanics have moderate clustering, and Hispanics and noncitizens are above the overall average. Distance values are generally similar to but a bit lower than the KNN. Contiguity values are generally quite a bit lower.